waterQUAC:: Water Data Quality Control

Cameron Roberts

2024-02-19

Introduction

This R library has been developed as a tool-set for processing high frequency water quality and quantity data. It features easy API data extraction functions from common endpoints:

#devtools::install_github("https://github.com/UncleCamsWaterPlans/waterQUAC")
library(waterQUAC)
library(plotly)
#> Warning: package 'plotly' was built under R version 4.2.3
#> Loading required package: ggplot2
#> Warning: package 'ggplot2' was built under R version 4.2.3
#> 
#> Attaching package: 'plotly'
#> The following object is masked from 'package:ggplot2':
#> 
#>     last_plot
#> The following object is masked from 'package:stats':
#> 
#>     filter
#> The following object is masked from 'package:graphics':
#> 
#>     layout


# import discharge data from WMIP (Herbert River at Ingham - 1160001F)
discharge <- waterQUAC::wmip_hist("116001F", 
                                "discharge",
                                "AT",
                                "20220701",
                                "20230630")
#> Rows: 8737 Columns: 6
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr (1): varname
#> dbl (5): site, var, time, value, quality
#> 
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# extract gridded weather obs data from that locaiton (SILO)
rain <- waterQUAC::silo_grid(lat = "-18.62831", 
                                    long = "146.16486",
                                    start = "20220701",
                                    finish = "20230630",
                                    username = "example@email.com.au")
#> Rows: 1 Columns: 858
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\""
#> chr (397): X3, X11, X15, X19, X23, X27, X31, X35, X39, X43, X47, X51, X55, X...
#> dbl   (1): X2
#> lgl (460): X1, X4, X5, X6, X7, X8, X9, X10, X12, X13, X14, X16, X17, X18, X2...
#> 
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

#plot daily rainfall against stream discharge

discharge %>%
  plot_ly() %>%
  add_trace(
    x =  ~ time,
    y =  ~ value,
    mode = "lines",
    name = "Stream Discharge (m^3/s)",
    type = "scatter",
    fill = "tozeroy",
    line = list(
      color = "tozeroy",
      width = 2.5,
      dash = 'solid'
    ),
    connectgaps = TRUE
  )  %>%
  add_trace(
    x = ~ rain$Date,
    y = ~ rain$Rain,
    type = 'bar',
    yaxis = "y3",
    name = "Daily Rainfall",
    marker = list(
      color = "darkblue",
      opacity = 0.3,
      size = 10
    )
  ) %>%
  
  layout(
    legend = list(orientation = 'h'),
    xaxis = list(title = FALSE, 
                 showgrid = FALSE,
                 domain=c(0,0.85)),
    yaxis = list(
      title = list(text = "<b>Stream Level</b> (m)", font = list(size = 15)),
      showgrid = FALSE,
      side = "right"
    ),
    yaxis3 = list(
      tickfont = list(color = "darkblue"),
      showgrid = FALSE,
      overlaying = "y",
      side = "right",
      anchor = "free",
      position = 0.92,
      autorange = "reversed",
      title = list(text = "<b>Rainfall</b> (mm)", font = list(size = 15))
    )
  ) %>%
  #Add modebar buttons
  config(
    modeBarButtonsToAdd = list(
      'drawline',
      'drawopenpath',
      'drawclosedpath',
      'drawcircle',
      'drawrect',
      'eraseshape'
    )
  )

Time series quality coding

Note the various macros within the vignette section of the metadata block above. These are required in order to instruct R how to build the vignette. Note that you should change the title field and the \VignetteIndexEntry to match the title of your vignette.

#load in a test dataset for this example. See format of dataframe for required input format
df <- waterQUAC::TSS_data[1:3000,]

head(df)
#> # A tibble: 6 × 3
#>   ts                  Value Quality
#>   <dttm>              <dbl>   <int>
#> 1 2020-12-02 17:07:00  40.4     204
#> 2 2020-12-09 08:12:00  45.0     223
#> 3 2020-12-09 08:30:00  42.9     223
#> 4 2020-12-09 08:45:00  45.2     223
#> 5 2020-12-09 09:00:00  48.1     223
#> 6 2020-12-09 09:15:00  45.9     223
#example Totasl Suspended Solids dataframe
df <- waterQUAC::TSS_data

#overwritable QC codes, all else are retained. In this case, all codes will be overwritten
manual_codes = c(1:4000)
#upper and lower limits for the sensor uses (Trios Opus)
sensorMin = 0
sensorMax = 650

tst <- ts_anom(df = df,
              overwrite = manual_codes,
              sensorMin = 0,
              sensorMax = 650)
tst |>
 plotly::plot_ly() |>
 plotly::add_markers(
   x =  ~ ts,
   y =  ~ Value,
   type = "scatter",
   color = ~ Quality
 )